import os
import sys
from pathlib import Path
import torch
import yaml
sys.path.append('./')
port = 0
path = Path('').resolve()
for last in path.rglob('*/**/last.pt'):
    ckpt = torch.load(last)
    if ckpt['optimizer'] is None:
        continue
    with open(last.parent.parent / 'opt.yaml') as f:
        opt = yaml.safe_load(f)
    d = opt['device'].split(',')
    nd = len(d)
    ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1)
    if ddp:
        port += 1
        cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
    else:
        cmd = f'python train.py --resume {last}'
    cmd += ' > /dev/null 2>&1 &'
    print(cmd)
    os.system(cmd)